package cn.my.springHDFS.util;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import us.codecraft.webmagic.Page;
import us.codecraft.webmagic.Site;
import us.codecraft.webmagic.processor.PageProcessor;
import us.codecraft.webmagic.selector.Selectable;

import java.util.List;

/**
 * WebHDFS 获取页面
 * Created by Leo on 2017/6/15.
 */
public class WebHDFSProcessor implements PageProcessor {

    private static final Logger logger = LoggerFactory.getLogger(WebHDFSProcessor.class);

    private Site site = Site.me()
            .setSleepTime(100)
            .setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36");

    @Override
    public void process(Page page) {
        boolean WebHDFS_status = page.getHtml().regex(".*active*.").match();
        logger.info("IsActive: " + WebHDFS_status);
        site.addHeader("IsActive", String.valueOf(WebHDFS_status));
    }

    @Override
    public Site getSite() {
        return site;
    }

}
